In [1]:
from collections import OrderedDict
import datetime
import numpy as np
import pandas as pd
from IPython.display import HTML
import statsmodels.api as sm
from break4w.categorical import Categorical
from break4w.continous import Continous
from break4w.question import Question
from break4w.bool import Bool
from break4w.data_dictionary import DataDictionary
I'm going to try to make a data dictionary object using columns from an example data dictionary and study I worked with a while ago. I'm going to start this by assuming we can convert a text documnt to a series of dictionaries to build off of. I'm going to use the data description from the Statsmodels National Election DataSet.
In [2]:
data_ = pd.DataFrame(sm.datasets.anes96.load().data)
In [3]:
columns = [
{
'name': 'popul',
'description': 'Census place population in 1000s',
'dtype': float,
'units': 'people',
'magnitude': 1000,
},
{
'name': 'TVnews',
'description': 'Number of times per week that respondent watches TV news.',
'dtype': int,
'units': 'views per week',
'clean_name': 'TV news',
'limits': [0, None]
},
{
'name': 'PID',
'description': 'Party identification of respondent',
'dtype': int,
'order': [0, 1, 2, 3, 4, 5, 6],
'numeric_mapping': {0: 'Strong Democrat',
1: 'Weak Democrat',
2: 'Independent-Democrat',
3: 'Independent-Indpendent',
4: 'Independent-Republican',
5: 'Weak Republican',
6: 'Strong Republican'}
},
{
'name': 'vote',
'description': 'Individual expected to vote for Bob Dole',
'dtype': bool,
},
]
types = ['continous', 'question', 'categorical']
In [4]:
type_lookup = {'continous': Continous,
'categorical': Categorical,
'multiple choice': Categorical,
'ordinal': Categorical,
'bool': Bool,
'boolean': Bool,
'yes/no': Bool,
}
In [5]:
proto_dict = OrderedDict()
for col_, type_ in zip(*(columns, types)):
question_type = type_lookup.get(type_.lower(), Question)
proto_dict[col_['name']] = question_type(**col_)
# proto_dict[col_['name']] = Continous(**col_)
# elif type_ == 'categorical':
# proto_dict[col_['name']] = Categorical(**col_)
# else:
# proto_dict[col_['name']] = Question(**col_)
In [6]:
proto_dict['popul'].to_dict()
Out[6]:
In [7]:
type_lookup = {'continous': Continous,
'categorical': Categorical,
'multiple choice': Categorical,
'ordinal': Categorical,
'bool': Bool,
'boolean': Bool,
'yes/no': Bool,
}
In [8]:
proto_dict
Out[8]:
In [ ]:
In [9]:
dict_ = DataDictionary(columns, types)
In [10]:
print(dict_)
In [11]:
df_ = dict_.to_dataframe()
In [12]:
df_
Out[12]:
In [ ]:
In [ ]:
df_
In [ ]:
test.add_question(columns[0], types[0])
test.add_question(Continous(**columns[1]))
test.add_question(columns[1], types[1])
In [ ]:
list(test.columns.keys())
In [ ]:
columns = test.columns
In [ ]:
columns
In [ ]:
del columns['popul']
In [ ]:
columns
In [ ]:
test.columns
In [ ]:
test.log
In [ ]:
test.add_question(columns[2], types[2])
In [ ]:
current = vars(test['popul'])
In [ ]:
new = {'blanks': 'not applicable',
'frog': 'Chowder'}
In [ ]:
change_keys = {}
for k, v in new.items():
if k in current:
change_keys[k] = (current[k], v)
else:
change_keys[k] = ('add', v)
setattr(test['popul'], k, v)
In [ ]:
test['popul'].frog
In [ ]:
check = test['popul']
In [ ]:
check.cat = 'None'
In [ ]:
check.cat
In [ ]:
test['popul'].cat
In [ ]:
vars(check)
In [ ]: